**********************
**********************
*Replication Code for

*Hansen, Eric R. “Mass Higher Education and Voter Turnout in the U.S.” Electoral Studies.

*Compiled by Eric Hansen, 10/13/25 (ehansen4@luc.edu)
*Using Stata 19.5 SE.
**********************
**********************

*******
*State-Level Analyses
*******

*set your working directory
*cd "YourFilepath"

*open data
use "Hansen_Mass_Higher_Education_Turnout_Replication_Data.dta", clear

*clean and recode variables
gen caseid = _n
recode pres (0=1)(1=0)(*=.), gen(midterm)
gen ipol = college_ipol
replace lag_vep = lag_vep*100
replace vepvotingrate = vepvotingrate*100
replace delta_vepvotingrate = delta_vepvotingrate*100
sort st year 
by st: gen lagpop = poptotal2[_n-4]
gen popchange = 100*(poptotal2-lagpop)/lagpop
gen cost = finalcovi
gen income = hincomemed/1000
gen time = year - 1980
replace time = . if time < 0
replace vepvoting2 = . if vepvoting2 == 0
encode st, gen(stID)

*label variables
label variable ipol "College"
label variable gub_election "Governor on Ballot"
label variable ussen_elect "Senator on Ballot"
label variable prescomp "Competitiveness"
label variable union_density "Union Density"
label variable unemployment "Unemployment Rate"
label variable white "% White"
label variable black "% Black"
label variable hispanic "% Hispanic"
label variable pop18change4 "% Population Change"
label variable midterm "Midterm"
label variable time "Time"
label variable cost "Cost of Voting"
label variable income "Median Household Income ($1000s)"
label variable medianage "Median Age"
label variable dempreslag "Lagged Democratic Vote Share"
label variable schoolyears "Mean Years of Schooling"

*set time-series variables
xtset stID year

*Figure 1: College Degree Attainment by State, 2019
preserve
keep if year==2020
gen STUSPS = st
merge 1:1 STUSPS using "C:\Users\erhan\OneDrive - Loyola University Chicago\Education\State Attainment\Data\Replication Data\geo2xy_us_data.dta"
drop if _merge!=3
*create map
*format college_ipol %4.2f
spmap college_ipol using geo2xy_picard, clmethod(custom) clbreaks(20 25 30 35 40 60) id(_ID) fcolor(Greys) legtitle("% Population with 4-Year Degree") legend(pos(5) row(6) ring(1) size(*1) symx(*1) symy(*1) forcesize label(2 "20-25%") label(3 "25-30%") label(4 "30-35%") label(5 "35-40%") label(6 "40-45%"))
restore

*Appendix Table A1: Summary Statistics for State-Level Data
sum vepvotingrate ipol gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4 schoolyears cost finalrank cpscollege cpsvote

*correlation between income and education reported in text
corr ipol income 

*Figure 2: Predicted State-Level Turnout, 1980-2020
gen where = -0.5
gen pipe = "|"
qui xtreg vepvotingrate ipol gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4, fe i(stID) cluster(stID)
margins, at(ipol=(0(5)60) midterm=0)
marginsplot, recast(line) recastci(rarea) yline(0, lcol(black))  plotopts(lcol(black)) ciopts(col(gs8)) addplot(scatter where ipol, ms(none) mlabel(pipe) mlabpos(0) mlabcolor(gs0)  xscale(range(0(10)60)) xlabel(0(10)60) xtick(0(10)60) xtitle("College Attainment Rate", margin(medium)) ytitle("Predicted Turnout", margin(medium))) legend(off) graphregion(fcolor(white)) title(" ")

*Table 1: State College Attainment and Voter Turnout, 1980-2020
xtreg vepvotingrate ipol, fe i(stID) cluster(stID)
xtreg vepvotingrate ipol gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4, fe i(stID) cluster(stID)
xtreg vepvotingrate ipol gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4 c.time, fe i(stID) cluster(stID)
xtreg vepvotingrate ipol gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4 c.time##i.stID, fe i(stID) cluster(stID)

*Appendix Figure A1: Distribution of Coefficients in Multiverse Analysis of FE Model with Controls 
*ssc install multivrs
multivrs xtreg vepvotingrate ipol gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4, fe

*Appendix Figure A2: Turnout Trends by State, 1980-2020
twoway (scatter vepvotingrate year, xtitle("Year", margin(medium)) ytitle("Turnout", margin(medium)) mcolor(gs12)) (lfit vepvotingrate year, lcolor(black) lwidth(thick)),  by(st, legend(off) graphregion(fcolor(white)) title(" ") note(" ") style(compact))

*Appendix Table A2: State Mean Years of Schooling and Voter Turnout, 1980-2020
xtreg vepvotingrate schoolyears, fe i(stID) cluster(stID)
xtreg vepvotingrate schoolyears gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4, fe i(stID) cluster(stID)
xtreg vepvotingrate schoolyears gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4 c.time, fe i(stID) cluster(stID)
xtreg vepvotingrate schoolyears gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4 c.time##i.stID, fe i(stID) cluster(stID)

*Appendix Table A3: Educational Attainment, Turnout, and the Cost of Voting in the 50 States
xtreg vepvotingrate ipol cost gub_election ussen_elect prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4, fe i(stID) cluster(stID)
xtreg vepvotingrate ipol finalrank gub_election ussen_elect prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4, fe i(stID) cluster(stID)

*Appendix Figure A3: Turnout and Lagged College Attainment Rates
*ssc install coefplot, replace
qui xtreg vepvotingrate ipol ipol_lag10 ipol_lag20 ipol_lag30 ipol_lag40 gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4, fe i(stID) cluster(stID)
coefplot, keep(ipol ipol_lag10 ipol_lag20 ipol_lag30 ipol_lag40) coeflabels(ipol_lag10 = "College{subscript:t-10}" ipol_lag20 = "College{subscript:t-20}" ipol_lag30 = "College{subscript:t-30}" ipol_lag40 = "College{subscript:t-40}") mcolor(black) ciopts(recast(rcap) lcolor(black)) xline(0, lcolor(black)) xtitle("Coefficient Estimate", margin(medsmall))

*Appendix Table A4: State Educational Attainment and Voter Turnout with Lagged Effects
*FE w/time-varying controls -- lags
xtreg vepvotingrate ipol ipol_lag10 ipol_lag20 ipol_lag30 ipol_lag40 gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4, fe i(stID) cluster(stID)

*long-run lagged effects reported in appendix text
lincom ipol + ipol_lag10 + ipol_lag20 + ipol_lag30 + ipol_lag40

*Appendix Figure A4: Diminishing Returns to Higher Education?
gen where1 = 0
qui xtreg vepvotingrate c.ipol##c.ipol gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4, fe i(stID) cluster(stID)
margins, dydx(ipol) at(ipol=(0(5)60))
marginsplot, recast(line) recastci(rarea) yline(0, lcol(black))  plotopts(lcol(black)) ciopts(col(gs8)) addplot(scatter where1 ipol, ms(none) mlabel(pipe) mlabpos(0) mlabcolor(gs0) xscale(range(0(10)60)) xlabel(0(10)60) xtick(0(10)60) xtitle("College Attainment Rate", margin(medium)) ytitle("Marginal Effect of College Attainment Rate", margin(medium))) legend(off) graphregion(fcolor(white)) title("")

*Appendix Table A5: Diminishing Returns to Educational Attainment?
xtreg vepvotingrate c.ipol##c.ipol, fe i(stID) cluster(stID)
xtreg vepvotingrate c.ipol##c.ipol gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4, fe i(stID) cluster(stID)
xtreg vepvotingrate c.ipol##c.ipol gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4 c.time, fe i(stID) cluster(stID)
xtreg vepvotingrate c.ipol##c.ipol gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4 c.time##i.stID, fe i(stID) cluster(stID)

*Appendix Table A6: Replication of Main Results Using Self-Reported Turnout from CPS
*multiply each variable by 100 to put on same scale as Census/election data
replace cpscollege = cpscollege*100
replace cpsvote = cpsvote*100
xtreg cpsvote cpscollege, fe i(stID) cluster(stID)
xtreg cpsvote cpscollege gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4, fe i(stID) cluster(stID)
xtreg cpsvote cpscollege gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4 c.time, fe i(stID) cluster(stID)
xtreg cpsvote cpscollege gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4 c.time##i.stID, fe i(stID) cluster(stID)


*Appendix Figure A5 - A7
*these figures rely on individual-level data from the Current Population Survey. Replication code can be found further below in the section labeled "Individual Analysis with CPS Data."

*Appendix Figure A8: Marginal Effect of Dem. Vote Share Conditional on College Attainment in Presidential Elections
xtreg vepvotingrate c.ipol##c.dempreslag##i.year if midterm==0, fe i(stID) cluster(stID)
margins year, dydx(dempreslag)
marginsplot, recast(line) yline(0, lcol(black)) plotopts(lcol(black)) ciopts(col(black)) xtitle("Year", margin(medium)) ytitle("Marginal Effect of Lagged Dem. Vote Share", margin(medium)) title("")

*Appendix Figure A9: Marginal Effect of Dem. Vote Share Conditional on College Attainment in Midterm Elections
xtreg vepvotingrate c.ipol##c.dempreslag##i.year if midterm==1, fe i(stID) cluster(stID)
margins year, dydx(dempreslag)
marginsplot, recast(line) yline(0, lcol(black)) plotopts(lcol(black)) ciopts(col(black)) xtitle("Year", margin(medium)) ytitle("Marginal Effect of Lagged Dem. Vote Share", margin(medium)) title("")

*Appendix Table A7: State Educational Attainment and Voter Turnout, 1980-2016
preserve
drop if year > 2017
xtreg vepvotingrate ipol, fe i(stID) cluster(stID)
xtreg vepvotingrate ipol gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4, fe i(stID) cluster(stID)
xtreg vepvotingrate ipol gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4 c.time, fe i(stID) cluster(stID)
xtreg vepvotingrate ipol gub_election ussen_elect midterm prescomp dempreslag union_density unemployment income medianage white black hispanic pop18change4 c.time##i.stID, fe i(stID) cluster(stID)
restore

*Appendix Table A8: Interaction of College and Democratic Vote Share
xtreg vepvotingrate c.ipol##c.dempreslag, fe i(stID) cluster(stID)
xtreg vepvotingrate c.ipol##c.dempreslag gub_election ussen_elect midterm prescomp union_density unemployment income medianage white black hispanic pop18change4, fe i(stID) cluster(stID)
xtreg vepvotingrate c.ipol##c.dempreslag gub_election ussen_elect midterm prescomp  union_density unemployment income medianage white black hispanic pop18change4 c.time, fe i(stID) cluster(stID)
xtreg vepvotingrate c.ipol##c.dempreslag gub_election ussen_elect midterm prescomp  union_density unemployment income medianage white black hispanic pop18change4 c.time##i.stID, fe i(stID) cluster(stID)
preserve
drop if year > 2017
xtreg vepvotingrate c.ipol##c.dempreslag gub_election ussen_elect midterm prescomp  union_density unemployment income medianage white black hispanic pop18change4 c.time##i.stID, fe i(stID) cluster(stID)
restore

*Appendix Figure A10: Marginal Effect of College in State-Specific Linear Trend Model
gen where2=-1.5
qui xtreg vepvotingrate c.ipol##c.dempreslag gub_election ussen_elect midterm prescomp  union_density unemployment income medianage white black hispanic pop18change4 c.time##i.stID, fe i(stID) cluster(stID)
margins, dydx(ipol) at(dempreslag=(0(10)100))
marginsplot, recast(line) recastci(rarea) yline(0, lcol(black))  plotopts(lcol(black)) ciopts(col(gs8)) addplot(scatter where2 dempreslag, ms(none) mlabel(pipe) mlabpos(0) mlabcolor(gs0)  xscale(range(0(10)100)) xlabel(0(10)100) xtick(0(10)100) xtitle("Lagged Dem. Vote Share", margin(medium)) yscale(range(-1(1)4)) ylabel(-1(1)4) ytick(-1(1)4) ytitle("Marginal Effect of College Attainment", margin(medium))) legend(off) graphregion(fcolor(white)) title(" ")

*Appendix Figure A11: College Attainment and Turnout by State
twoway (scatter vepvotingrate ipol if year <2017, xtitle("College Attainment", margin(medium)) ytitle("Turnout", margin(medium)) mcolor(gs12)) (scatter vepvotingrate ipol if year > 2017, mcolor(red)) (lfit vepvotingrate ipol if year <2017, lcolor(black) lwidth(thick)),  by(st, legend(off) graphregion(fcolor(white)) title(" ") note(" ") style(compact))


********************
****Individual Analysis with CPS Data
********************

*Use to replicate Appendix Figures A5 - A7

use "Hansen_Mass_Higher_Education_Turnout_CPS.dta", clear

*drop people who haven't completed their educations yet
drop if age < 25
*drop non-citizens
drop if citizen==5
drop if year < 1980
recode educ (0/1=.)(999=.)(110/125=1)(*=0), gen(college)
recode voted (1=0)(2=1)(*=.), gen(vote)

set scheme white_cividis

gen presidential = 0
replace presidential = 1 if year == 1980 | year == 1984 | year == 1988 | year == 1992 | year == 1996 | year == 2000 | year == 2004 | year == 2008 | year == 2012 | year == 2016 | year == 2020

gen educelect = .
replace educelect = 1 if college ==1 & presidential ==1
replace educelect = 2 if college ==1 & presidential ==0
replace educelect = 3 if college ==0 & presidential ==1
replace educelect = 4 if college ==0 & presidential ==0

*Appendix Figure A5: Self-Reported Turnout by College Degree Holding
egen turnout = mean(vote), by(educelect year)
label def educelect 1 "College, Presidential" 2 "College, Midterm" 3 "Non-college, Presidential" 4 "Non-college, Midterm"
label val educelect educelect
separate turnout, by(educelect)
twoway (connect turnout? year, lcolor(black black gs8 gs8) mcolor(black black gs12 gs12) lpattern(solid shortdash solid shortdash)), ytitle("Turnout", margin(medsmall)) xtitle("Year", margin(medsmall)) ysc(range(0(.20)1)) ylabel(0(.20)1) ytick(0(.20)1) legend(pos(6) col(2))

*Appendix Figure A6: State Turnout Trends by Educational Attainment in Presidential Elections
twoway connect turnout1 turnout3 year, by(statecensus, note(" ")) lcolor(black black) mcolor(black black) lpattern(solid shortdash) ytitle("Turnout", margin(medsmall)) xtitle("Year", margin(medsmall)) ysc(range(0(.20)1)) ylabel(0(.20)1) ytick(0(.20)1) legend(pos(6) col(2))

*Appendix Figure A7: State Turnout Trends by Educational Attainment in Midterm Elections
twoway connect turnout2 turnout4 year, by(statecensus, note(" ")) lcolor(black black) mcolor(black black) lpattern(solid shortdash) ytitle("Turnout", margin(medsmall)) xtitle("Year", margin(medsmall)) ysc(range(0(.20)1)) ylabel(0(.20)1) ytick(0(.20)1) legend(pos(6) col(2))


***********************
****County-Level Analyses
***********************

*load data
use "Hansen_Mass_Higher_Education_Turnout_County_Data.dta", clear

*drop Alaska & Hawaii
drop if FIPSStateCode == 2 | FIPSStateCode == 15
keep if year < 2009
gen time = year - 1976
sort county_fips year
by county_fips: gen lagpop = pop[_n-1]
gen popchange = (pop-lagpop)/lagpop*100
by county_fips: gen demlag = demvoteshare[_n-1]
xtset county_fips year

*Appendix Table A9: Summary Statistics for County-Level Data
sum Turnout college_ipol GubElection SenElection closeness demlag Unemploy_County_new PcntBlack popchange 

*Figure 3: Predicted County-Level Turnout in Presidential Elections, 1976-2008
gen where = -0.5
gen pipe = "|"
qui xtreg Turnout college_ipol GubElection SenElection closeness demlag Unemploy_County_new PcntBlack popchange, fe i(county_fips) cluster(county_fips)
margins, at(college_ipol=(0(5)80))
marginsplot, recast(line) recastci(rarea) yline(0, lcol(black))  plotopts(lcol(black)) ciopts(col(gs8)) addplot(scatter where college_ipol, ms(none) mlabel(pipe) mlabpos(0) mlabcolor(gs0)  xscale(range(0(10)80)) xlabel(0(10)80) xtick(0(10)80) xtitle("College Attainment Rate", margin(medium)) yscale(range(0(20)100)) ylabel(0(20)100) ytick(0(20)100) ytitle("Predicted Turnout", margin(medium))) legend(off) graphregion(fcolor(white)) title(" ")

*Appendix Table A10: County Educational Attainment and Voter Turnout, 1976-2008
xtreg Turnout college_ipol if year<2009, fe i(county_fips) cluster(county_fips)
xtreg Turnout college_ipol GubElection SenElection closeness demlag Unemploy_County_new PcntBlack popchange, fe i(county_fips) cluster(county_fips)
xtreg Turnout college_ipol GubElection SenElection closeness demlag Unemploy_County_new PcntBlack popchange c.time, fe i(county_fips) cluster(county_fips)